# Download data
url <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE137001&format=file&file=GSE137001%5FTPM%5Freprogramming%5Fintermediates%2Exlsx"
filename <- "GSE137001_TPM_reprogramming_intermediates.xlsx"
if (!file.exists(filename)) {
download.file(url, filename)
}
tpms <- read_excel(filename)
# Generate metadata
metadata <- tibble(sample_name = colnames(tpms[0, 3:41])) %>%
mutate(state = case_when(
startsWith(sample_name, "d") ~ gsub("(d\\d+)_.+", "\\1", sample_name),
grepl("MEF", sample_name) ~ "MEF",
grepl("iPSC", sample_name) ~ "iPSC",
grepl("ESC", sample_name) ~ "ESC"
)) %>%
mutate(factors = case_when(
grepl("OSKM", sample_name) ~ "OSKM",
grepl("SKM", sample_name) ~ "SKM",
TRUE ~ "NA"
)) %>%
mutate(adhesion = case_when(
grepl("Epcam", sample_name) ~ "Epcam",
grepl("GFP", sample_name) ~ "GFP",
TRUE ~ "NA"
)) %>%
mutate(rep = case_when(
grepl("_R1", sample_name) ~ "1",
grepl("_R2", sample_name) ~ "2"
)) %>%
mutate_if(is.character, as.factor) %>%
mutate(state = ordered(state,
levels = c("MEF", "d2", "d4", "d6", "d9", "d12", "iPSC", "ESC")
))
PCA
# Log transform data for PCA
tpms_mat <- tpms %>% select(3:41) %>% as.matrix() %>% t()
trans_mat <- log10(1 + (tpms_mat))
pca_res <- stats::prcomp(trans_mat)
pca_tbl <- pca_res$x %>%
as.data.frame() %>%
rownames_to_column("sample_name") %>%
left_join(metadata, by = c("sample_name"))
pca_tbl %>%
ggplot(aes(
x = PC1, y = PC2, color = state, shape = factors
)) +
geom_point(size = 3) +
geom_text(aes(label = state), hjust = -0.2, vjust = -0.2)

# Get gene list to filter
stress_pathways <- c("GO:0036500") # Just the ATF6 pathway
# stress_pathways <- c("GO:0009267", "GO:0034198", "GO:0036003", "GO:0036500", "GO:1990928", "GO:0034599", "GO:0034976", "GO:0016236", "GO:0010506")
gostres <- gprofiler2::gost(stress_pathways)
stress_ensg <- gostres$meta$genes_metadata$query$query_1$ensgs
ens2sym <- AnnotationDbi::select(
EnsDb.Hsapiens.v86,
keys = keys(EnsDb.Hsapiens.v86),
columns = c("SYMBOL")
)
stress_genes <- ens2sym %>%
filter(GENEID %in% stress_ensg) %>%
pull(SYMBOL)
# Filter data for selected stress genes
tpms$Name <- unlist(lapply(tpms$Name, toupper)) # Capitalize gene names
cat("% stress genes in dataset:", 100*mean(stress_genes %in% tpms$Name))
% stress genes in dataset: 100
stress_tpms <- tpms %>% filter(Name %in% stress_genes)
# Scale the TPMs of each gene to be between 0 and 1
# Note: I couldn't figure out how to scale row-wise, so I transposed instead
scaled_tpms <- stress_tpms[3:41] %>%
t() %>%
as.data.frame() %>%
mutate_all(scales::rescale) %>% # Min-max scaling to be between 0 and 1
t() %>%
as_tibble() %>%
mutate(gene = stress_tpms$Name) %>%
relocate(gene)
# Long format data to play nice with ggplot
long_df <- scaled_tpms %>%
pivot_longer(
contains("_")
) %>%
rename(sample_name = name) %>%
rename(scaled_tpm = value) %>%
left_join(metadata, by = "sample_name")
# Fake MEF rows to include in facet wrap
fake_rows <- long_df %>% filter(state == "MEF") %>% mutate(factors = "SKM") %>%
rbind(long_df %>% filter(state == "MEF") %>% mutate(factors = "OSKM"))
Boxplot
long_df %>%
rbind(fake_rows) %>% # So we see MEF data when facet wrapped by "factors"
filter(factors != "NA") %>%
ggplot(aes(x = state, y = scaled_tpm)) +
stat_boxplot(geom ='errorbar', width = 0.5) +
geom_boxplot() +
facet_wrap(~factors) +
geom_jitter(color = "blue", width = 0.1)

Line plot
ggp <- long_df %>%
filter(factors != "NA") %>%
rbind(fake_rows) %>%
group_by(factors, state, gene) %>%
summarize(
mean_tpm = mean(scaled_tpm),
max_tpm = max(scaled_tpm),
min_tpm = min(scaled_tpm),
.groups = "keep"
) %>%
ungroup() %>%
ggplot(aes(
x = state,
y = mean_tpm,
ymin = min_tpm,
ymax = max_tpm,
group = factors,
color = factors
)) +
geom_line() +
geom_ribbon(alpha = 0.5) +
facet_wrap(~gene, ncol = 2) +
xlab("scale_TPM")
ggplotly(ggp)
LS0tCnRpdGxlOiAiQXZlcmFnZSBUUE0gQWNyb3NzIFRpbWUgb2YgU2VsZWN0IEdlbmVzIChHU0UxMzcwMDEpIgphdXRob3I6ICJKYW1lcyBEYW8iCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZm9sZGluZzogaGlkZQogICAgdGhlbWU6IGNlcnVsZWFuCi0tLQoKYGBge3IgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShyZWFkeGwpCmxpYnJhcnkoRW5zRGIuSHNhcGllbnMudjg2KQpsaWJyYXJ5KERFU2VxMikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocGxvdGx5KQpgYGAKCgpgYGB7cn0KIyBEb3dubG9hZCBkYXRhCnVybCA8LSAiaHR0cHM6Ly93d3cubmNiaS5ubG0ubmloLmdvdi9nZW8vZG93bmxvYWQvP2FjYz1HU0UxMzcwMDEmZm9ybWF0PWZpbGUmZmlsZT1HU0UxMzcwMDElNUZUUE0lNUZyZXByb2dyYW1taW5nJTVGaW50ZXJtZWRpYXRlcyUyRXhsc3giCmZpbGVuYW1lIDwtICJHU0UxMzcwMDFfVFBNX3JlcHJvZ3JhbW1pbmdfaW50ZXJtZWRpYXRlcy54bHN4IgppZiAoIWZpbGUuZXhpc3RzKGZpbGVuYW1lKSkgewogIGRvd25sb2FkLmZpbGUodXJsLCBmaWxlbmFtZSkKfQp0cG1zIDwtIHJlYWRfZXhjZWwoZmlsZW5hbWUpCmBgYAoKCmBgYHtyfQojIEdlbmVyYXRlIG1ldGFkYXRhCm1ldGFkYXRhIDwtIHRpYmJsZShzYW1wbGVfbmFtZSA9IGNvbG5hbWVzKHRwbXNbMCwgMzo0MV0pKSAlPiUKICBtdXRhdGUoc3RhdGUgPSBjYXNlX3doZW4oCiAgICBzdGFydHNXaXRoKHNhbXBsZV9uYW1lLCAiZCIpIH4gZ3N1YigiKGRcXGQrKV8uKyIsICJcXDEiLCBzYW1wbGVfbmFtZSksCiAgICBncmVwbCgiTUVGIiwgc2FtcGxlX25hbWUpIH4gIk1FRiIsCiAgICBncmVwbCgiaVBTQyIsIHNhbXBsZV9uYW1lKSB+ICJpUFNDIiwKICAgIGdyZXBsKCJFU0MiLCBzYW1wbGVfbmFtZSkgfiAiRVNDIgogICkpICU+JQogIG11dGF0ZShmYWN0b3JzID0gY2FzZV93aGVuKAogICAgZ3JlcGwoIk9TS00iLCBzYW1wbGVfbmFtZSkgfiAiT1NLTSIsCiAgICBncmVwbCgiU0tNIiwgc2FtcGxlX25hbWUpIH4gIlNLTSIsCiAgICBUUlVFIH4gIk5BIgogICkpICU+JQogIG11dGF0ZShhZGhlc2lvbiA9IGNhc2Vfd2hlbigKICAgIGdyZXBsKCJFcGNhbSIsIHNhbXBsZV9uYW1lKSB+ICJFcGNhbSIsCiAgICBncmVwbCgiR0ZQIiwgc2FtcGxlX25hbWUpIH4gIkdGUCIsCiAgICBUUlVFIH4gIk5BIgogICkpICU+JQogICAgbXV0YXRlKHJlcCA9IGNhc2Vfd2hlbigKICAgIGdyZXBsKCJfUjEiLCBzYW1wbGVfbmFtZSkgfiAiMSIsCiAgICBncmVwbCgiX1IyIiwgc2FtcGxlX25hbWUpIH4gIjIiCiAgKSkgJT4lCiAgbXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgYXMuZmFjdG9yKSAlPiUKICBtdXRhdGUoc3RhdGUgPSBvcmRlcmVkKHN0YXRlLAogICAgbGV2ZWxzID0gYygiTUVGIiwgImQyIiwgImQ0IiwgImQ2IiwgImQ5IiwgImQxMiIsICJpUFNDIiwgIkVTQyIpCiAgKSkKYGBgCgoKIyBQQ0EKCmBgYHtyfQojIExvZyB0cmFuc2Zvcm0gZGF0YSBmb3IgUENBCnRwbXNfbWF0IDwtIHRwbXMgJT4lIHNlbGVjdCgzOjQxKSAlPiUgYXMubWF0cml4KCkgJT4lIHQoKQp0cmFuc19tYXQgPC0gbG9nMTAoMSArICh0cG1zX21hdCkpCgpwY2FfcmVzIDwtIHN0YXRzOjpwcmNvbXAodHJhbnNfbWF0KSAKCnBjYV90YmwgPC0gcGNhX3JlcyR4ICU+JQogIGFzLmRhdGEuZnJhbWUoKSAlPiUKICByb3duYW1lc190b19jb2x1bW4oInNhbXBsZV9uYW1lIikgJT4lCiAgbGVmdF9qb2luKG1ldGFkYXRhLCBieSA9IGMoInNhbXBsZV9uYW1lIikpCgpwY2FfdGJsICU+JQogIGdncGxvdChhZXMoCiAgICB4ID0gUEMxLCB5ID0gUEMyLCBjb2xvciA9IHN0YXRlLCBzaGFwZSA9IGZhY3RvcnMKICApKSArIAogIGdlb21fcG9pbnQoc2l6ZSA9IDMpICsKICBnZW9tX3RleHQoYWVzKGxhYmVsID0gc3RhdGUpLCBoanVzdCA9IC0wLjIsIHZqdXN0ID0gLTAuMikKYGBgCgoKYGBge3J9CiMgR2V0IGdlbmUgbGlzdCB0byBmaWx0ZXIKc3RyZXNzX3BhdGh3YXlzIDwtIGMoIkdPOjAwMzY1MDAiKSAgIyBKdXN0IHRoZSBBVEY2IHBhdGh3YXkKIyBzdHJlc3NfcGF0aHdheXMgPC0gYygiR086MDAwOTI2NyIsICJHTzowMDM0MTk4IiwgIkdPOjAwMzYwMDMiLCAiR086MDAzNjUwMCIsICJHTzoxOTkwOTI4IiwgIkdPOjAwMzQ1OTkiLCAiR086MDAzNDk3NiIsICJHTzowMDE2MjM2IiwgIkdPOjAwMTA1MDYiKQoKZ29zdHJlcyA8LSBncHJvZmlsZXIyOjpnb3N0KHN0cmVzc19wYXRod2F5cykKc3RyZXNzX2Vuc2cgPC0gZ29zdHJlcyRtZXRhJGdlbmVzX21ldGFkYXRhJHF1ZXJ5JHF1ZXJ5XzEkZW5zZ3MKCmVuczJzeW0gPC0gQW5ub3RhdGlvbkRiaTo6c2VsZWN0KAogIEVuc0RiLkhzYXBpZW5zLnY4NiwKICBrZXlzID0ga2V5cyhFbnNEYi5Ic2FwaWVucy52ODYpLAogIGNvbHVtbnMgPSBjKCJTWU1CT0wiKQopCgpzdHJlc3NfZ2VuZXMgPC0gZW5zMnN5bSAlPiUKICBmaWx0ZXIoR0VORUlEICVpbiUgc3RyZXNzX2Vuc2cpICU+JQogIHB1bGwoU1lNQk9MKQpgYGAKCgpgYGB7cn0KIyBGaWx0ZXIgZGF0YSBmb3Igc2VsZWN0ZWQgc3RyZXNzIGdlbmVzCnRwbXMkTmFtZSA8LSB1bmxpc3QobGFwcGx5KHRwbXMkTmFtZSwgdG91cHBlcikpICAjIENhcGl0YWxpemUgZ2VuZSBuYW1lcwpjYXQoIiUgc3RyZXNzIGdlbmVzIGluIGRhdGFzZXQ6IiwgMTAwKm1lYW4oc3RyZXNzX2dlbmVzICVpbiUgdHBtcyROYW1lKSkKCnN0cmVzc190cG1zIDwtIHRwbXMgJT4lIGZpbHRlcihOYW1lICVpbiUgc3RyZXNzX2dlbmVzKQpgYGAKCgpgYGB7cn0KIyBTY2FsZSB0aGUgVFBNcyBvZiBlYWNoIGdlbmUgdG8gYmUgYmV0d2VlbiAwIGFuZCAxCiMgTm90ZTogSSBjb3VsZG4ndCBmaWd1cmUgb3V0IGhvdyB0byBzY2FsZSByb3ctd2lzZSwgc28gSSB0cmFuc3Bvc2VkIGluc3RlYWQKc2NhbGVkX3RwbXMgPC0gc3RyZXNzX3RwbXNbMzo0MV0gJT4lCiAgdCgpICU+JQogIGFzLmRhdGEuZnJhbWUoKSAlPiUKICBtdXRhdGVfYWxsKHNjYWxlczo6cmVzY2FsZSkgJT4lICAjIE1pbi1tYXggc2NhbGluZyB0byBiZSBiZXR3ZWVuIDAgYW5kIDEKICB0KCkgJT4lCiAgYXNfdGliYmxlKCkgJT4lCiAgbXV0YXRlKGdlbmUgPSBzdHJlc3NfdHBtcyROYW1lKSAlPiUKICByZWxvY2F0ZShnZW5lKQpgYGAKCgpgYGB7cn0KIyBMb25nIGZvcm1hdCBkYXRhIHRvIHBsYXkgbmljZSB3aXRoIGdncGxvdApsb25nX2RmIDwtIHNjYWxlZF90cG1zICU+JQogIHBpdm90X2xvbmdlcigKICAgIGNvbnRhaW5zKCJfIikKICApICU+JQogIHJlbmFtZShzYW1wbGVfbmFtZSA9IG5hbWUpICU+JQogIHJlbmFtZShzY2FsZWRfdHBtID0gdmFsdWUpICU+JQogIGxlZnRfam9pbihtZXRhZGF0YSwgYnkgPSAic2FtcGxlX25hbWUiKQoKIyBGYWtlIE1FRiByb3dzIHRvIGluY2x1ZGUgaW4gZmFjZXQgd3JhcApmYWtlX3Jvd3MgPC0gbG9uZ19kZiAlPiUgZmlsdGVyKHN0YXRlID09ICJNRUYiKSAlPiUgbXV0YXRlKGZhY3RvcnMgPSAiU0tNIikgJT4lCiAgcmJpbmQobG9uZ19kZiAlPiUgZmlsdGVyKHN0YXRlID09ICJNRUYiKSAlPiUgbXV0YXRlKGZhY3RvcnMgPSAiT1NLTSIpKQogIApgYGAKCgojIEJveHBsb3QKCmBgYHtyfQpsb25nX2RmICU+JQogIHJiaW5kKGZha2Vfcm93cykgJT4lICAjIFNvIHdlIHNlZSBNRUYgZGF0YSB3aGVuIGZhY2V0IHdyYXBwZWQgYnkgImZhY3RvcnMiCiAgZmlsdGVyKGZhY3RvcnMgIT0gIk5BIikgJT4lCiAgZ2dwbG90KGFlcyh4ID0gc3RhdGUsIHkgPSBzY2FsZWRfdHBtKSkgKwogIHN0YXRfYm94cGxvdChnZW9tID0nZXJyb3JiYXInLCB3aWR0aCA9IDAuNSkgKwogIGdlb21fYm94cGxvdCgpICsKICBmYWNldF93cmFwKH5mYWN0b3JzKSArIAogIGdlb21faml0dGVyKGNvbG9yID0gImJsdWUiLCB3aWR0aCA9IDAuMSkKYGBgCgoKIyBMaW5lIHBsb3QKCmBgYHtyIGZpZy53aWR0aD04fQpnZ3AgPC0gbG9uZ19kZiAlPiUKICBmaWx0ZXIoZmFjdG9ycyAhPSAiTkEiKSAlPiUKICByYmluZChmYWtlX3Jvd3MpICU+JQogIGdyb3VwX2J5KGZhY3RvcnMsIHN0YXRlLCBnZW5lKSAlPiUKICBzdW1tYXJpemUoCiAgICBtZWFuX3RwbSA9IG1lYW4oc2NhbGVkX3RwbSksCiAgICBtYXhfdHBtID0gbWF4KHNjYWxlZF90cG0pLAogICAgbWluX3RwbSA9IG1pbihzY2FsZWRfdHBtKSwKICAgIC5ncm91cHMgPSAia2VlcCIKICApICU+JQogIHVuZ3JvdXAoKSAlPiUKICBnZ3Bsb3QoYWVzKAogICAgeCA9IHN0YXRlLAogICAgeSA9IG1lYW5fdHBtLAogICAgeW1pbiA9IG1pbl90cG0sCiAgICB5bWF4ID0gbWF4X3RwbSwKICAgIGdyb3VwID0gZmFjdG9ycywKICAgIGNvbG9yID0gZmFjdG9ycwogICkpICsgCiAgZ2VvbV9saW5lKCkgKwogIGdlb21fcmliYm9uKGFscGhhID0gMC41KSArCiAgZmFjZXRfd3JhcCh+Z2VuZSwgbmNvbCA9IDIpICsKICB4bGFiKCJzY2FsZV9UUE0iKQoKZ2dwbG90bHkoZ2dwKQpgYGAKCgoKCgoKCgoKCgoKCgoKCgo=